/* Gauss code used to generate the alternative beta estimates in 
Paolino, Philip.  2001.  "Maximum Likelihood Estimation of Models 
with Beta-Distributed Dependent Variables."  Political Analysis.
9:4 325-348.  The code assumes that any covariates included in the 
dispersion function are also included in the mean function */

library cml;
#include cml.ext;
CMLset;

__title=" ** A Beta Model, Numerical Solution **";
dta="/home/gov/faculty/ppaolino/parr/selden"; @ data file here @
let dep=minelig;                  @ dependent variable here @
let ind=efficncy respmin hardship minority;  @ covariates for mean here @
let indV=0;        @ covariates for dispersion here @
                   @ for constant disperion, set indV=0 @
let sel=0;         @ variable for selecting out observations here @

c={1,0,0,0,0};   /* variables to include from mean function, including 
                    constant, to include in the dispersion function for 
		    the purpose of calculating the first different of 
		    the variance function.  "Switch" element of vector 
		    to 1 for included variables */

if ind==0;
   vars=dep;
   elseif indV==0; 
   vars=dep|ind;    
   else;
   vars=dep|ind|indV;
endif;

dataset=listw(dta,vars,sel);  @ proc performs listwise deletion on the @
                              @ input dataset @

stval=   -4.8037|             @ vector of starting values @
	  2.7896|
	  0.1345|
	  2.6309|
	  0.1754|
	  1.2412;
	 
proc psi(x);           @ proc calls the digamma function @
  local p;
  x=x+6;
  p=1/(x.*x);
  p=(((0.004166666666667*p-0.003968253986254).*p+
      0.008333333333333).*p-0.083333333333333).*p;
  p=p+ln(x)-0.5/x-1/(x-1)-1/(x-2)-1/(x-3)-1/(x-4)-1/(x-5)-1/(x-6);
retp(p);
endp;

proc gradproc(theta,ds);  @ gradient function @
    local y,one,x,z,be,h,xb,zh,zh1,e,per1,gr1,gr2,grad;
    y=(ds[.,1]+.01)*100/10002;
    one=ones(rows(ds),1);
if ind==0;
    x=one;
    else;
    x=one~ds[.,2:rows(ind)+1];
endif;
if indV==0;
    z=one;	     
    else;
    z=one~ds[.,cols(ds)-rows(indV)+1:cols(ds)];
endif;		     
    be=theta[1:cols(x),1];
    h=trimr(theta,rows(be),0);      
    xb=exp(x*be);
    zh=exp(z*h);
    zh1=zh+1;
    e=xb./(1+xb);
    per1=(-x.*e+x.*(e^2)).*zh1;

    gr1=psi(e.*zh1+(1-e).*zh1-1).*(x.*e.*zh1-(e^2).*zh1.*x+per1)-
        psi(e.*zh1-e).*(x.*e.*zh1-(e^2).*zh1.*x-x.*e+(e^2).*x)-
        psi((1-e).*zh1-1+e).*(per1+x.*e-(e^2).*x)+
        (x.*e.*zh1-(e^2).*zh1.*x-x.*e+(e^2).*x).*ln(y)+
        (per1+x.*e-(e^2).*x).*ln(1-y);
    gr2=psi(e.*zh1+(1-e).*zh1-1).*(e.*z.*zh+(1-e).*z.*zh)-
        (psi(e.*zh1-e).*xb.*z.*zh)./(1+xb)-
        psi((1-e).*zh1-1+e).*(1-e).*z.*zh+xb.*z.*zh.*ln(y)./(1+xb)+
        (1-e).*z.*zh.*ln(1-y);
    grad=gr1~gr2;
    retp(grad);endp;

proc loglik(theta,ds);   @ likelihood function @
    local y,one,x,z,be,h,xb,zh,e,v,a,b,llik,ds1;
		     
    y=(ds[.,1]+.01)*100/10002;
    one=ones(rows(ds),1);
if ind==0;
    x=one;
    else;
    x=one~ds[.,2:rows(ind)+1];
endif;
if indV==0;
    z=one;	     
    else;
    z=one~ds[.,cols(ds)-rows(indV)+1:cols(ds)];
endif;		     
    be=theta[1:cols(x),1];            @ parameters for the mean function @
    h=trimr(theta,rows(be),0);        @ parameters for dispersion function @
    xb=exp(x*be);
    zh=exp(z*h);
    e=xb./(1+xb);
    v=e.*(1-e).*(1/(zh+1));
    a=((e^2).*(1-e))./v-e;
    b=(e.*(1-e)^2)./v-(1-e);
  if sumc(a.<0)>0; print "a failed";;zh; endif;
  if sumc(b.<0)>0; print "b failed";;zh; endif;
    llik=lng(a+b)-lng(a)-lng(b)+(a-1).*ln(y)+(b-1).*ln(1-y);
    retp(llik);endp;

    if ind==0;
      _cml_ParNames="const"|"const";
    elseif indV==0;
      _cml_ParNames="const"|ind|"const";
    else;
      _cml_ParNames="const"|ind|"const"|indV;
    endif;

_cml_GradCheck=1;
_cml_GradProc=&gradproc;  
@ _cml_Options = { none }; @

t=dta $+ dep $+ ".alt.out"; 
output file=^t reset; 
    print "dependent variable=" ;; $dep; 
    {b,logl,g,vc,ret}=CMLprt(CML(dataset,0,&loglik,stval));

/* Use results to obtain first differences */    
    
/* set ind and indV matrices */
y=(dataset[.,1]+.01)*100/10002;
one=ones(rows(dataset),1);

if ind==0;
  x=one;
  else;
  x=one~dataset[.,2:rows(ind)+1]; 
endif;

if indV==0;
  z=one;
  else;
  z=x;
endif;		     

/* get predicted values of y */
    bb=trimr(b,0,1);
    if indV/=0;
       bb=trimr(b,0,rows(indV)+1);
    endif;
    xb=exp(x*bb);
    eyb=xb./(1+xb);

/* set descriptives for x and z */
meanx=meanc(x);  @ these are k1X1 vectors @
stdx=stdc(x);	
minx=minc(x);	
maxx=maxc(x);	

meanz=meanc(z);
stdz=stdc(z);
minz=minc(z);
maxz=maxc(z);

/* create switching parameter */

if ind/=0;
sw=zeros(1,cols(x)-1)|eye(cols(x)-1);
xl=meanx-stdx.*sw;           @ these are kXk-1 vectors @
xh=meanx+stdx.*sw;       
xmin=meanx.*(1-sw)+minx.*sw;  
xmax=meanx.*(1-sw)+maxx.*sw;
zl=xl; zh=xh; zmin=xmin; zmax=xmax;
endif;

/* generate the parameter vectors */
    be=b[1:cols(x),.];      @ chop off parms for alpha (k1X1) @
    h=trimr(b,rows(be),0);  @ chop off parms for beta (k2X1) @

/* get predicted means */

    em=(exp(meanx'*be)./(1+exp(meanx'*be)))'; @ these are nX1 vectors @
    if ind/=0;
      emin=(exp(xmin'*be)./(1+exp(xmin'*be)))';
      emax=(exp(xmax'*be)./(1+exp(xmax'*be)))';
      el=(exp(xl'*be)./(1+exp(xl'*be)))';
      eh=(exp(xh'*be)./(1+exp(xh'*be)))';
    endif;
  
/* get first differences for the mean */
    if ind/=0;
    fdm2=eh-el;
    fdmm=emax-emin;
    endif;
    
/* get predicted variances for variables affecting the mean */

    j=1; i=1; hx=zeros(rows(c),1); 
    do until j>rows(c);
       if c[j]==1;
	 hx[j]=hx[j]+h[i];
	 i=i+1;
       endif;
    j=j+1;
    endo;

  if indV==0;
    hx=h;
  endif;
    dm=(1/(exp(meanz'*hx)+1))';
    vm=em.*(1-em).*dm;

      dmin=(1/(exp(zmin'*hx)+1))';
      dmax=(1/(exp(zmax'*hx)+1))';
      dl=(1/(exp(zl'*hx)+1))';
      dh=(1/(exp(zh'*hx)+1))';
      vmin=emin.*(1-emin).*dmin;
      vmax=emax.*(1-emax).*dmax;
      vl=el.*(1-el).*dl;
      vh=eh.*(1-eh).*dh;

      fdv2=vh-vl;
      fdvm=vmax-vmin;

      
/* print out the first differences for the mean function, evaluated at 
   both the 1 std dev +/- the mean and at the minimum and maximum values */
   
  if ind/=0;
    print $ind;; meanc(fdm2)~meanc(fdmm);
  endif;

/* print out the first differences for the variance function, evaluated at 
   both the 1 std dev +/- the mean and at the minimum and maximum values, 
   along with the corresponding standard errors */ 
   
    print $ind;; meanc(fdv2)~stdc(fdv2)~meanc(fdvm)~stdc(fdvm);
    
format /rd 8,5;

print "alpha=";; ((em^2).*(1-em))./vm-em;
print "beta=";; (em.*(1-em)^2)./vm-(1-em);
alp=((em^2).*(1-em))./vm-em;
bet=(em.*(1-em)^2)./vm-(1-em);
print "ave mean=";; alp/(alp+bet);
print "ave var=";; alp*bet/((alp+bet)^2*(alp+bet+1)); 

/* get mse of y */
mseb=sumc((y-eyb)^2)/rows(y);

print "extreme vals of y,Ey_beta";
minc(y)~maxc(y)~minc(eyb)~maxc(eyb);

output off;

